import pickle
from multiprocessing import Pool

from maxminddb import open_database, MODE_MEMORY

from crawleth.config import ADDED_POOL, COMPLETED_UP_POOL, ASKED_POOL, EXPORT_POOLS
from crawleth.data_management.geo import gather_node_information


def _export(redis_db, logger):  # export redis database to json

    logger.info(str(redis_db.scard(ADDED_POOL)) + " nodes added")
    logger.info(
        f"{str(redis_db.scard(COMPLETED_UP_POOL))} nodes fully crawled out of {redis_db.scard(ASKED_POOL)} asked \
        nodes. If the difference is too high, consider reducing (NO)CRAWL_MAX_NODES_TO_WAIT")
    try:
        geoip = open_database('geoip/GeoLite2-City.mmdb', mode=MODE_MEMORY)
    except FileNotFoundError:
        geoip = None
    try:
        geoip2 = open_database('geoip/GeoLite2-ASN.mmdb', mode=MODE_MEMORY)
    except FileNotFoundError:
        geoip2 = None
    data = {}

    for pool in EXPORT_POOLS:
        # selection of all the existing nodes of the pool
        nodes = [(geoip, geoip2, pickle.loads(redis_db.get(key))) for key in redis_db.smembers(pool) if
                 redis_db.exists(key)]
        # information gathering (parallelized)
        # None results are excluded
        if nodes:
            with Pool() as multiprocessing_pool:
                data[pool] = [n for n in multiprocessing_pool.map(gather_node_information, nodes) if n is not None]
        else:
            data[pool] = list()
        logger.info("Exported " + str(len(nodes)) + " entries for " + pool)

    return data
